1 Neighbourhood boundaries

1.1 BCC suburbs

# orig data missing???
SUB <- read_csv("data-raw/geo/suburb-and-adjoining-suburb-november-2019.zip") %>% 
  clean_names() %>%
  remove_empty(c("rows", "cols")) %>% 
  select(suburb_name) %>% 
  rename(SSC_NAME16 = suburb_name) %>%  
  distinct() %>% 
  mutate(SSC_NAME16 = str_to_title(SSC_NAME16)) %>% 
  arrange(SSC_NAME16)

write_rds(SUB, "data/geo/clean/SUB.Rds")

This might very well include areas with no pops (and therefore no SEIFA), for instance:

# A tibble: 2 x 1
  SSC_NAME16      
  <chr>           
1 Brisbane Airport
2 Port Of Brisbane

1.2 ABS

Names from Brisbane containing (Brisbane - Qld), names from Qld containing (Qld) have to be cleaned to match BCC data.

Mcdowall is called McDowall and Mount Coot-tha is Mount Coot-Tha - these have been unified as well.

unzip("data-raw/geo/1270055003_ssc_2016_aust_shape.zip", 
      exdir = "data-raw/geo")

SSC <- st_read("data-raw/geo/1270055003_ssc_2016_aust_shape/SSC_2016_AUST.shp", 
               stringsAsFactors = FALSE) %>% 
  mutate(SSC_CODE16 = as.integer(SSC_CODE16)) %>% 
  select(-STE_NAME16, -STE_CODE16, -AREASQKM16) %>%     
  st_transform(3112) %>%    
  filter(!st_is_empty(geometry)) %>% 
  mutate(SSC_NAME16_orig = SSC_NAME16) %>%  
  mutate(SSC_NAME16 = str_remove(SSC_NAME16, 
                                 fixed(" (Brisbane - Qld)"))) %>% 
  mutate(SSC_NAME16 = str_remove(SSC_NAME16, 
                                 fixed(" (Qld)"))) %>% 
  mutate(SSC_NAME16 = ifelse(SSC_NAME16 == "McDowall",
                             "Mcdowall", SSC_NAME16)) %>% 
  mutate(SSC_NAME16 = ifelse(SSC_NAME16 == "Mount Coot-tha",
                             "Mount Coot-Tha", SSC_NAME16))

# SSC <- rmapshaper::ms_simplify(SSC, keep = 0.05, weighting = 0.7) # default settings

write_rds(SSC, "data/geo/SSC_2016_AUST.Rds")

unlink("data-raw/geo/1270055003_ssc_2016_aust_shape", recursive = TRUE)

Stones Corner doesn’t exist in ABS but it does in BCC. It seems it’s part of Greenslopes.

# A tibble: 1 x 3
  SSC_NAME16    SSC_CODE16 SSC_NAME16_orig
  <chr>              <int> <chr>          
1 Stones Corner         NA <NA>           

After clean the datasets were merged.

Full map

2 SEIFA

SEIFA <- read_xls("data-raw/SEIFA/2033055001 - ssc indexes.xls", 
                  sheet = "Table 1", skip = 5, n_max = 13719, na = "-") %>% 
  clean_names() %>%
  remove_empty(c("rows", "cols")) %>% 
  dplyr::rename(SSC_CODE16 = x1,
                SSC_NAME16 = x2,
                IRSD = score_3,
                IRSD_d = decile_4,
                IRSAD = score_5,
                IRSAD_d = decile_6,
                IER = score_7,
                IER_d = decile_8,
                IEO = score_9,
                IEO_d = decile_10,
                URP = x11,
                caution = x12) %>% 
  # deciles kept as integers here
  mutate(SSC_CODE16 = as.integer(SSC_CODE16),
         IRSD = as.integer(IRSD),
         IRSAD = as.integer(IRSAD),
         IER = as.integer(IER),
         IEO = as.integer(IEO),
         IRSD_d = as.integer(IRSD_d),
         IRSAD_d = as.integer(IRSAD_d),
         IER_d = as.integer(IER_d),
         IEO_d = as.integer(IEO_d),
         URP = as.integer(URP)
  ) %>% 
  mutate(caution = as.logical(ifelse(is.na(caution), "False", "True")))

2.1 Coverage

Comparing Australia & Brisbane, example of IRSD.

2.1.1 Frequencies

2.1.1.1 Australia

x <integer> 
# total N=13713 valid N=13691 mean=5.50 sd=2.87

Value |    N | Raw % | Valid % | Cum. %
---------------------------------------
    1 | 1369 |  9.98 |   10.00 |  10.00
    2 | 1369 |  9.98 |   10.00 |  20.00
    3 | 1370 |  9.99 |   10.01 |  30.01
    4 | 1370 |  9.99 |   10.01 |  40.01
    5 | 1367 |  9.97 |    9.98 |  50.00
    6 | 1370 |  9.99 |   10.01 |  60.00
    7 | 1369 |  9.98 |   10.00 |  70.00
    8 | 1369 |  9.98 |   10.00 |  80.00
    9 | 1371 | 10.00 |   10.01 |  90.02
   10 | 1367 |  9.97 |    9.98 | 100.00
 <NA> |   22 |  0.16 |    <NA> |   <NA>

2.1.1.2 Brisbane

x <integer> 
# total N=193 valid N=184 mean=7.79 sd=2.37

Value |  N | Raw % | Valid % | Cum. %
-------------------------------------
    1 |  3 |  1.55 |    1.63 |   1.63
    2 |  6 |  3.11 |    3.26 |   4.89
    3 |  5 |  2.59 |    2.72 |   7.61
    4 |  5 |  2.59 |    2.72 |  10.33
    5 | 13 |  6.74 |    7.07 |  17.39
    6 | 12 |  6.22 |    6.52 |  23.91
    7 | 28 | 14.51 |   15.22 |  39.13
    8 | 18 |  9.33 |    9.78 |  48.91
    9 | 33 | 17.10 |   17.93 |  66.85
   10 | 61 | 31.61 |   33.15 | 100.00
 <NA> |  9 |  4.66 |    <NA> |   <NA>

2.1.2 Frequencies

2.1.2.1 Australia

2.1.2.2 Brisbane

2.1.3 Frequencies

2.1.3.1 Australia

2.1.3.2 Brisbane

2.2 Missing

Few areas with missing SEIFA

          SSC_NAME16 IRSD_d IRSAD_d IER_d IEO_d URP
1        Banks Creek     NA      NA    NA    NA  NA
2   Brisbane Airport     NA      NA    NA    NA  NA
3         Eagle Farm     NA      NA    NA    NA  NA
4 Enoggera Reservoir     NA      NA    NA    10  25
5          Karawatha     NA      NA    NA    NA  NA
6          Larapinta     NA      NA    NA    NA  NA
7             Lytton     NA      NA    NA    NA  NA
8     Mount Coot-Tha     NA      NA    NA    NA  NA
9   Port Of Brisbane     NA      NA    NA    NA  NA

These were excluded.

2.3 Caution

Few cases with ABS flag caution.

     SSC_NAME16 IRSD IRSD_d IRSAD IRSAD_d  IER IER_d  IEO IEO_d URP
1        Bulwer 1014      6   996       6 1012     5 1024     7  49
2   Cowan Cowan 1014      6   996       6 1012     5 1024     7  28
3 England Creek 1004      5   978       5 1063     8  949     3  33
4     Kooringal 1014      6   996       6 1012     5 1024     7  45

Usually with very small pop numbers.

These remain included.

2.4 ‘Local’ deciles

Original values of indices were used to calculate ‘local deciles’ using SSCs for Brisbane only

SSC %<>% 
  mutate(IRSD_d_orig = IRSD_d,
         IRSD_d = ntile(IRSD, 10),
         IRSAD_d_orig = IRSAD_d,
         IRSAD_d = ntile(IRSAD, 10),
         IER_d_orig = IER_d,
         IER_d = ntile(IER, 10),
         IEO_d_orig = IEO_d,
         IEO_d = ntile(IEO, 10)) %>% 
  mutate(across(ends_with("_d"), factor)) %>% 
  mutate(across(ends_with("_d_orig"), factor)) %>% 
  relocate(IRSD_d_orig, .after = IRSD_d) %>% 
  relocate(IRSAD_d_orig, .after = IRSAD_d) %>% 
  relocate(IER_d_orig, .after = IER_d) %>% 
  relocate(IEO_d_orig, .after = IEO_d) 

write_rds(SSC, "data/geo/SSC.Rds")

Note: Deciles are represented as factor variable. Certain application could benefit from changing it to ordered?

Agreement across specific indices :

2.4.1 IRSD_d

IRSD_d_orig == IRSD_d <lgl> 
# total N=184 valid N=184 mean=0.11 sd=0.32

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 163 | 88.59 |   88.59 |  88.59
TRUE  |  21 | 11.41 |   11.41 | 100.00
<NA>  |   0 |  0.00 |    <NA> |   <NA>

2.4.2 IRSAD_d

IRSAD_d_orig == IRSAD_d <lgl> 
# total N=184 valid N=184 mean=0.11 sd=0.32

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 163 | 88.59 |   88.59 |  88.59
TRUE  |  21 | 11.41 |   11.41 | 100.00
<NA>  |   0 |  0.00 |    <NA> |   <NA>

2.4.3 IER_d

IER_d_orig == IER_d <lgl> 
# total N=184 valid N=184 mean=0.71 sd=0.45

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE |  53 | 28.80 |   28.80 |  28.80
TRUE  | 131 | 71.20 |   71.20 | 100.00
<NA>  |   0 |  0.00 |    <NA> |   <NA>

2.4.4 IEO_d

IEO_d_orig == IEO_d <lgl> 
# total N=184 valid N=184 mean=0.12 sd=0.33

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 162 | 88.04 |   88.04 |  88.04
TRUE  |  22 | 11.96 |   11.96 | 100.00
<NA>  |   0 |  0.00 |    <NA> |   <NA>

3 Dog cost

dog_cost <- read_xlsx("data-raw/costs/dog_expensive.xlsx") %>% 
  clean_names() %>%
  remove_empty(c("rows", "cols")) %>% 
  select(-web_source) %>% 
  rename(dog_breed = breed) %>%  
  select(dog_breed) %>% 
  distinct() %>% 
  # correcting names for better matching - these one used in BCE
  mutate(
    dog_breed = case_when(
      dog_breed == "Hairless Chinese Crested" ~ "Chinese Crested Dog",
      dog_breed == "Saint Bernard" ~ "St Bernard",
      TRUE ~ as.character(dog_breed))
  ) %>% 
  # synonyms
  add_row(dog_breed = "Dogue de Bordeaux") %>% 
  add_row(dog_breed = "Bulldog") %>% 
  add_row(dog_breed = "British Bulldog") %>% 
  mutate(expensive = "yes") %>% 
  arrange(dog_breed)

Top 20 most expensive dogs (+3 synonyms!)

4 Dog insurance

Data scraped from https://top10petinsurance.com.au/pet-insurance-prices on the 30th March 2020

# packages needed
# install.packages("rvest")

p_load(rvest, tidyverse)

# scraping table done with this using chrome: https://www.r-bloggers.com/using-rvest-to-scrape-an-html-table/
url <- 'https://top10petinsurance.com.au/pet-insurance-prices/'   

dog_insurance <- url %>%
  xml2::read_html() %>%
  html_nodes(xpath='//*[@id="post-1016"]/div/table') %>%
  html_table()

dog_insurance <- dog_insurance[[1]]

head(dog_insurance)

write_rds(dog_insurance, "data-raw/costs/dog_insurance.Rds") # extracted on the 30 March 2020
dog_insurance <- read_rds("data-raw/costs/dog_insurance.Rds") %>% 
  as_tibble() %>% 
  clean_names() %>%
  remove_empty(c("rows", "cols")) %>% 
  # select(-) %>% 
  mutate(average_accident_policy_cost_annual = 
           gsub(",", "",
                average_accident_policy_cost_annual, 
                fixed = TRUE),
         average_illness_policy_cost_annual = 
           gsub(",", "",
                average_illness_policy_cost_annual, 
                fixed = TRUE),
         average_comprehensive_policy_cost_annual = 
           gsub(",", "",
                average_comprehensive_policy_cost_annual, 
                fixed = TRUE)
  ) %>% 
  mutate(average_accident_policy_cost_annual = 
           as.numeric(gsub("$", "",
                           average_accident_policy_cost_annual, 
                           fixed = TRUE)),
         average_illness_policy_cost_annual = 
           as.numeric(gsub("$", "",
                           average_illness_policy_cost_annual, 
                           fixed = TRUE)),
         average_comprehensive_policy_cost_annual = 
           as.numeric(gsub("$", "",
                           average_comprehensive_policy_cost_annual, 
                           fixed = TRUE))
  )

4.1 Three major categories

cost_compared_to_other_breeds <character> 
# total N=558 valid N=558 mean=1.97 sd=0.42

Value                       |   N | Raw % | Valid % | Cum. %
------------------------------------------------------------
Above average               |  58 | 10.39 |   10.39 |  10.39
Below average               | 457 | 81.90 |   81.90 |  92.29
Significantly above average |  43 |  7.71 |    7.71 | 100.00
<NA>                        |   0 |  0.00 |    <NA> |   <NA>

4.2 Individual breeds

Breeds in Significantly above average category:

Breeds in Above average category:

5 Dog ownership in Brisbane

Raw dataset named cars-bis-open-data-animal-permits-3-jan-2019 consists of 107,405 records.

5.1 Data selection

Excluding records with:

  • permit_name: Breeders Permit, Cattery Permit, Racehorses Permit, Pet Shop Permit, Domestic Dog Permit & Guard Dog Permit
  • without neighbourhood
  • without dog_breed values
  • with dog_breed listed as Unknown or Cross

Stones Corner values were assigned to Greenslopes SSC (see information in section above).

dog_ownership %<>% 
  clean_names() %>%
  remove_empty(c("rows", "cols")) %>% 
  # all the same here
  select(-permit_group, -permit_status) %>% 
  # special permits?
  filter(!permit_name %in% c("Breeders Permit", "Cattery Permit", "Racehorses Permit", "Pet Shop Permit")) %>% 
  filter(!permit_name %in% c("Domestic Dog Permit", "Guard Dog Permit")) %>%
  rename(dog_breed = animal_breed,
         SSC_NAME16 = application_location_suburb) %>% 
  mutate(SSC_NAME16 = str_to_title(SSC_NAME16)) %>% 
  # correct suburb
  mutate(SSC_NAME16 = ifelse(SSC_NAME16 == "Stones Corner", "Greenslopes", SSC_NAME16)) %>% 
  # missing geo
  filter(!is.na(SSC_NAME16)) %>% 
  # missing breed
  filter(!is.na(dog_breed)) %>% 
  filter(!dog_breed %in% c("Unknown", 
                           "Medium Cross Breed", "Small Cross Breed", "Large Cross Breed")) %>% 
  # few cleans for better matches
  mutate(
    dog_breed = case_when(
      dog_breed == "German Shepherd Dog (Long Stock Coat)" ~ "German Shepherd",
      dog_breed == "German Shepherd Dog" ~ "German Shepherd",
      dog_breed == "Central Asian Shepherd Dog " ~ "Central Asian Shepherd",
      dog_breed == "Kangal Dog" ~ "Kangal",
      dog_breed == "Bulldog" ~ "British Bulldog",
      dog_breed == "Collie (Rough)" ~ "Rough Collie", 
      dog_breed == "Collie (Smooth)" ~ "Smooth Collie",      
      
      TRUE ~ as.character(dog_breed))
  ) 

Prepared dataset consists of 106,018 records.

5.2 Data merging - dog_cost

dog_ownership_cost <- left_join(dog_ownership, dog_cost, by = "dog_breed") %>% 
  # binary indicator "expensive" and "non-expensive" dog breeds according to dog_cost
  mutate(
    expensive = case_when(
      is.na(expensive) ~ 0,
      expensive == "yes" ~ 1
    )
  )

Expensive dogs in Brisbane:

expensive <numeric> 
# total N=106018 valid N=106018 mean=0.14 sd=0.35

Value |     N | Raw % | Valid % | Cum. %
----------------------------------------
    0 | 90940 | 85.78 |   85.78 |  85.78
    1 | 15078 | 14.22 |   14.22 | 100.00
 <NA> |     0 |  0.00 |    <NA> |   <NA>

5.3 Data merging - dog_insurance

Correcting names for better match

dog_insurance %<>%
  mutate(
    dog_breed = case_when(
      # this is a bit tricky! might need sensitivity?
      dog_breed == "French Poodle" ~ "Poodle",  
      
      # just naming issues
      dog_breed == "Poodle – Standard" ~ "Poodle (Standard)",
      dog_breed == "Miniature Poodle" ~ "Poodle (Miniature)",
      dog_breed == "Poodle – Toy" ~ "Poodle (Toy)",
      dog_breed == "Shar-Pei" ~ "Shar Pei", 
      dog_breed == "German Short Haired Pointer" ~ "German Shorthaired Pointer", 
      dog_breed == "German Wire Haired Pointer" ~ "German Wirehaired Pointer", 
      dog_breed == "Collie – Rough" ~ "Rough Collie", 
      dog_breed == "Collie – Smooth" ~ "Smooth Collie", 
      dog_breed == "Miniature Schnauzer" ~ "Schnauzer (Miniature)", 
      dog_breed == "Schnauzer Giant" ~ "Schnauzer (Giant)", 
      dog_breed == "Lagotto Rom" ~ "Lagotto Romagnolo", 
      dog_breed == "Brittany Spaniel" ~ "Brittany", 
      dog_breed == "Staghound" ~ "Stag Hound", 
      dog_breed == "Kerry Blue" ~ "Kerry Blue Terrier", 
      dog_breed == "English Toy terrier" ~ "English Toy Terrier",
      dog_breed == "Parson Jack Russell Terrier" ~ "Parson Russell Terrier", 
      dog_breed == "Welsh Corgi – Pembroke" ~ "Welsh Corgi (Pembroke)", 
      dog_breed == "American Cocker Spaniel" ~ "Cocker Spaniel (American)",
      dog_breed == "Basset Fauve De Bretagne" ~ "Basset Fauve de Bretagne", 
      dog_breed == "Norwegian Elk Hound" ~ "Norwegian Elkhound", 
      dog_breed == "Cheasapeake Bay Retriever" ~ "Chesapeake Bay Retriever", 
      dog_breed == "Bouvier Des Flandres" ~ "Bouvier des Flandres", 
      dog_breed == "Miniature Bull Terrier" ~ "Bull Terrier (Miniature)", 
      dog_breed == "Munsterlander – Large" ~ "Large Munsterlander", 
      dog_breed == "Welsh Corgi – Cardigan" ~ "Welsh Corgi (Cardigan)", 
      dog_breed == "HamiltonStovare" ~ "Hamiltonstovare",
      dog_breed == "Blue tick Coonhound" ~ "Bluetick Coonhound",
      dog_breed == "Japanese Akita" ~ "Akita (Japanese)",
      dog_breed == "Dogue De Bordeaux" ~ "Dogue de Bordeaux",
      dog_breed == "Italian Cane Corso" ~ "Italian Corso Dog",
      
      # different kelpies but same category anyway
      dog_breed == "Australian Kelpie Sheepdog" ~ "Australian Kelpie", 
      
      # typo
      dog_breed == "Neopolitan Mastiff" ~ "Neapolitan Mastiff", 
      TRUE ~ as.character(dog_breed)))

Dog breeds without match

Some further corrections still possible here:

dog_ownership_cost <-  left_join(dog_ownership_cost, 
                                 dog_insurance %>% 
                                   select(dog_breed, cost_compared_to_other_breeds)
) %>% 
  mutate(
    cost_compared_to_other_breeds = case_when(
      # all the same
      dog_breed == "Fox Terrier" ~ "Below average",
      dog_breed == "Fox Terrier (Smooth)" ~ "Below average",
      dog_breed == "Fox Terrier (Wire)" ~ "Below average",
      dog_breed == "Schnauzer" ~ "Below average",
      dog_breed == "Australian Stumpy Tail Cattle Dog" ~ "Below average",
      dog_breed == "Foxhound" ~ "Below average",
      dog_breed == "White Swiss Shepherd Dog" ~ "Below average",
      
      # multiple options here, but all above going for conservative
      dog_breed == "Welsh Corgi" ~ "Above average",
      
      # taking values from Chihuahua
      dog_breed == "Chihuahua (Smooth Coat)" ~ "Below average",
      dog_breed == "Chihuahua (Long Coat)" ~ "Below average",
      dog_breed == "Chihuahua (Long Coat)" ~ "Below average",
      
      # naming - Dogue De Bordeaux
      dog_breed == "French Mastiff" ~ "Significantly above average",
      TRUE ~ as.character(cost_compared_to_other_breeds))) %>% 
  select(-permit_name)

Few things left:

Two largest groups:

  • Pointer - insufficient info!
  • Akita - insufficient info! Could be Inu, could be Japanese

These observations will remain as NAs.

6 Computing Environment

 R version 4.1.2 (2021-11-01)
 Platform: x86_64-w64-mingw32/x64 (64-bit)
 Running under: Windows 10 x64 (build 18363)
 
 Matrix products: default
 
 attached base packages:
 [1] stats     graphics  grDevices utils     datasets  methods   base     
 
 other attached packages:
  [1] tmap_3.3-2      sf_1.0-5        gmodels_2.18.1  DT_0.20        
  [5] sjPlot_2.8.10   sjmisc_2.8.9    janitor_2.1.0   scales_1.1.1   
  [9] magrittr_2.0.1  readxl_1.3.1    forcats_0.5.1   stringr_1.4.0  
 [13] dplyr_1.0.7     purrr_0.3.4     readr_2.1.1     tidyr_1.1.4    
 [17] tibble_3.1.6    ggplot2_3.3.5   tidyverse_1.3.1 pacman_0.5.1   
 
To cite R in publications use:

R Core Team (2021). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. https://www.R-project.org/.

To cite the ggplot2 package in publications use:

Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4, https://ggplot2.tidyverse.org.